# -*- coding: utf-8 -*-

# Scrapy settings for scrapy_zhaohaofang project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     https://doc.scrapy.org/en/latest/topics/settings.html
#     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
#     https://doc.scrapy.org/en/latest/topics/spider-middleware.html

BOT_NAME = 'scrapy_zhaohaofang'

SPIDER_MODULES = ['scrapy_zhaohaofang.spiders']
NEWSPIDER_MODULE = 'scrapy_zhaohaofang.spiders'

# Crawl responsibly by identifying yourself (and your website) on the user-agent
# USER_AGENT = 'scrapy_zhaohaofang (+http://www.yourdomain.com)'

# Obey robots.txt rules
ROBOTSTXT_OBEY = False

LOG_LEVEL = 'INFO'
LOG_STDOUT = True
LOG_ENABLED = True
LOG_FILE = '/log/python/scrapy/zhf/log.log'

# 设置延迟下载,减少被封杀的风险
DOWNLOAD_DELAY = 5
CONCURRENT_REQUESTS_PER_DOMAIN = 6
CONCURRENT_REQUESTS_PER_IP = 6

# 禁止重试
RETRY_ENABLED = False
# 禁止重定向;
REDIRECT_ENABLED = False
# 其中15是设置的下载超时时间
DOWNLOAD_TIMEOUT = 10
# 禁止cookie
COOKIES_ENABLED = True

# 图片下载储存路径
IMAGES_STORE = '/usr/webApp/'

DB_CONF = {}
DB_CONF['host'] = "127.0.0.1"
DB_CONF['user'] = "root"
DB_CONF['passwd'] = "Fs123456"
DB_CONF['port'] = 3306
DB_CONF['db'] = "zhaohaofang_db"


# Configure maximum concurrent requests performed by Scrapy (default: 16)
# CONCURRENT_REQUESTS = 32

# Configure a delay for requests for the same website (default: 0)
# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
# DOWNLOAD_DELAY = 3
# The download delay setting will honor only one of:
# CONCURRENT_REQUESTS_PER_DOMAIN = 16
# CONCURRENT_REQUESTS_PER_IP = 16

# Disable cookies (enabled by default)
# COOKIES_ENABLED = False

# Disable Telnet Console (enabled by default)
# TELNETCONSOLE_ENABLED = False

# Override the default request headers:
# DEFAULT_REQUEST_HEADERS = {
#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
#   'Accept-Language': 'en',
# }

# Enable or disable spider middlewares
# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
# SPIDER_MIDDLEWARES = {
#    'scrapy_zhaohaofang.middlewares.ScrapyZhaohaofangSpiderMiddleware': 543,
# }

# Enable or disable downloader middlewares
# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
# DOWNLOADER_MIDDLEWARES = {
#    'scrapy_zhaohaofang.middlewares.ScrapyZhaohaofangDownloaderMiddleware': 543,
# }

# Enable or disable extensions
# See https://doc.scrapy.org/en/latest/topics/extensions.html
# EXTENSIONS = {
#    'scrapy.extensions.telnet.TelnetConsole': None,
# }

ITEM_PIPELINES = {
   'scrapy_zhaohaofang.pipelines.ScrapyZhaohaofangDBFirstPipeline': 1,
   'scrapy_zhaohaofang.pipelines.ScrapyZhaohaofangPicPipeline': 20,
   'scrapy_zhaohaofang.pipelines.ScrapyZhaohaofangDBAfterPicPipeline': 35,
}
# Configure item pipelines
# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html

# Enable and configure the AutoThrottle extension (disabled by default)
# See https://doc.scrapy.org/en/latest/topics/autothrottle.html
# AUTOTHROTTLE_ENABLED = True
# The initial download delay
# AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
# AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
# AUTOTHROTTLE_DEBUG = False

# Enable and configure HTTP caching (disabled by default)
# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
# HTTPCACHE_ENABLED = True
# HTTPCACHE_EXPIRATION_SECS = 0
# HTTPCACHE_DIR = 'httpcache'
# HTTPCACHE_IGNORE_HTTP_CODES = []
# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'

# 参考网站 https://doc.scrapy.org/en/latest/topics/autothrottle.html
# 是否开启自动调节延时
AUTOTHROTTLE_ENABLED = True
# 延时最小值（初值），会尊重上边DOWNLOAD_DELAY的值，但不会强制使用
AUTOTHROTTLE_START_DELAY = 1
# 当网络波动时 自动调节的峰值
AUTOTHROTTLE_MAX_DELAY = 5
# 启用的并发平均使用数量（只是个指标，scrapy会将请求控制趋近于下面的数值，但不一定会达到）
AUTOTHROTTLE_TARGET_CONCURRENCY = 5.0
# 是否启用log
AUTOTHROTTLE_DEBUG = True

